##### ANALYSIS REPRODUCTION CODE #####

# Article: Do Online Newspapers Promote or Undermine Nation-Building in Divided Societies? Evidence From Africa
# Authors: Evan Lieberman and Andrew Miller
# Year: 2020

### Load Packages
library(stm)
library(stringr)
library(reshape)
library(reporttools)
library(plyr)
library(SnowballC)
library(ggplot2)
library(ggthemes)
library(stargazer)
library(xtable)
library(foreign)

### Set Working Directory 

### Load External Functions
# From http://alexandercoppock.com/commarobust
source("./commarobust.R")
source("./stargazer_helpers.R")

### Set Seed
set.seed(2001)

#### + Load Nigeria Data ####

# Load Nigeria data cleaned above

load("./nigeria_data.RData")

#### + Nigeria Analysis ####

#### ++ Table 1: Online News Consumption of Population ####

tab <- data.frame(round(table(n2$news_internet)/nrow(n2),2))
tab[,2] <- round(tab[,2]*100, 0)
colnames(tab) <- c("Frequency","Percent")
tabx <- xtable(tab, digits=0,
               caption = "Online News Consumption",
               lab="tab:news_consumption")
print(tabx,
      include.rownames=F,
      file="./news_consumption.tex")

#### ++ Table 2: Online News Consumption by Ethnicity ####

eth <- data.frame(
  Ethnicity=n2$ethnicity,
  Consumption=n2$news_internet)

tableNominal(eth,
             group = eth$Consumption,
             font.size = "tiny",
             cumsum=FALSE,
             lab="tab:demographics_ethnicity",
             cap="Online News Consumption by Ethnicity",
             file="./demographics_ethnicity2.tex")

#### ++ Table 3: Typical Ethnic Comments ####

d_hf <- d[which(d$comment_ethnic_hf==1),"comment_text"]
hf <- as.character(d_hf[1465])
d_yo <- d[which(d$comment_ethnic_yoruba==1),"comment_text"]
yo <- as.character(d_yo[4911])
d_igbo <- d[which(d$comment_ethnic_igbo==1),"comment_text"]
igbo <- as.character(d_igbo[2628])
d_ijaw <- d[which(d$comment_ethnic_ijaw==1),"comment_text"]
ijaw <- as.character(d_ijaw[783])

eth_com <- data.frame(
  Ethnicity=c("Hausa-Fulani","Yoruba","Igbo","Ijaw"),
  Comment=c(hf,yo,igbo,ijaw))

tabx <- xtable(eth_com,
               align=c("c", "c", "p{10cm}"),
               label = "tab:typical_comments",
               caption = "Typical Ethnic Comments")
print(tabx,
      include.rownames=F,
      booktabs = T,
      size="\\footnotesize",
      file="./typical_comments.tex")

#### ++ Table 4: OLS Regression of Headlines on Ethnic Comments ####

### Independent Variables
covar.names <- c(
  "Hausa-Fulani (Ethnic)","Yoruba (Ethnic)",
  "Igbo (Ethnic)","Ijaw (Ethnic)",
  "Nigeria (National)",
  "Unemployment (Economic)","Oil (Economic)","Naira (Economic)","Buhari (Political)",
  "Saraki (Political)","PDP (Political)", "APC (Political)","Federal (Political)",
  "Corrupt (Scapegoating)","Crime (Scapegoating)", "Football (Placebo)")
iv <- "title_ethnic_hf+title_ethnic_yoruba+title_ethnic_igbo+title_ethnic_ijaw+title_nigeria+title_unemploy+title_oil+title_naira+title_buhari+title_saraki+title_pdp+title_apc+title_federal+title_corrupt+title_crime+title_placebo_football+title_ethnic_conflict_igbo+title_ethnic_conflict_fulani+title_ethnic_conflict_bk+title_ethnic_conflict_delta+title_relig_christian+title_relig_muslim+title_pub_services"
tops <-  paste(c("top1_bin10", "top2_bin10","top3_bin10","top4_bin10","top5_bin10",
                 "top6_bin10","top7_bin10", "top8_bin10", "top9_bin10", "top10_bin10"),collapse="+")
tops <- paste("+",tops)
iv_top <- paste(iv,tops)

### Regressions
m_eth <- lm(paste("comment_ethnic ~ ",iv_top), art); commarobust(m_eth)
m_hf <- lm(paste("comment_ethnic_hf ~ ",iv_top), art); commarobust(m_hf)
m_yo <- lm(paste("comment_ethnic_yoruba ~ ",iv_top), art); commarobust(m_yo)
m_ib <- lm(paste("comment_ethnic_igbo ~ ",iv_top), art); commarobust(m_ib)
m_ij <- lm(paste("comment_ethnic_ijaw ~ ",iv_top), art); commarobust(m_ij)
m_nig <- lm(paste("comment_nigeria ~ ",iv_top), art); commarobust(m_nig)

### Table
stargazer(m_eth,m_hf,m_yo,m_ib,m_ij,m_nig, type="text",
          se = makerobustseslist(m_eth,m_hf,m_yo,m_ib,m_ij,m_nig),
          p = makerobustpslist(m_eth,m_hf,m_yo,m_ib,m_ij,m_nig),
          label = "tab:models_article",
          font.size = "tiny",
          dep.var.labels=c("All Ethnic","Hausa-Fulani","Yoruba","Igbo","Ijaw","Nigeria"),
          covariate.labels=covar.names,
          out="./models_article.tex",
          title = "Relationship between Headline Keywords and the Likelihood of At Least One Ethnic or National Comment in Response to Nigerian News Articles",
          omit=c("top","Constant","ethnic_conflict","relig","pub_service"),
          notes.append = T, 
          notes.align = "r",
          notes = "Models also include control variables for content topic developed with STM models."
)

#### ++ Figure 3: OLS Regression of Headlines on Ethnic Comments ####

### plot
coef_plot <- function(Variable_Names, Variable_Range,
                      xlab, png_file, Axis_Range){
  model1Frame <- data.frame(Variable = Variable_Names,
                            Coefficient = summary(m_eth)$coef[Variable_Range,1],
                            SE = summary(m_eth)$coef[Variable_Range, 2],
                            modelName = "All Ethnic")
  model2Frame <- data.frame(Variable = Variable_Names,
                            Coefficient = summary(m_hf)$coef[Variable_Range,1],
                            SE = summary(m_hf)$coef[Variable_Range, 2],
                            modelName = "Hausa-Fulani")
  model3Frame <- data.frame(Variable = Variable_Names,
                            Coefficient = summary(m_yo)$coef[Variable_Range,1],
                            SE = summary(m_yo)$coef[Variable_Range, 2],
                            modelName = "Yoruba")
  model4Frame <- data.frame(Variable = Variable_Names,
                            Coefficient = summary(m_ib)$coef[Variable_Range,1],
                            SE = summary(m_ib)$coef[Variable_Range, 2],
                            modelName = "Igbo")
  model5Frame <- data.frame(Variable = Variable_Names,
                            Coefficient = summary(m_ij)$coef[Variable_Range,1],
                            SE = summary(m_ij)$coef[Variable_Range, 2],
                            modelName = "Ijaw")
  # Combine these data.frames
  allModelFrame <- data.frame(rbind(model1Frame, model2Frame, model3Frame,
                                    model4Frame, model5Frame))  # etc.
  colnames(allModelFrame)[4] <- "Ethnic Term"
  allModelFrame$Variable <- factor(allModelFrame$Variable,
                                   levels = rev(Variable_Names))
  # Specify the width of your confidence intervals
  interval1 <- -qnorm((1-0.9)/2)  # 90% multiplier
  interval2 <- -qnorm((1-0.95)/2)  # 95% multiplier
  # Plot
  zp1 <- ggplot(allModelFrame, aes(color = `Ethnic Term`, shape=`Ethnic Term`))
  zp1 <- zp1 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)
  zp1 <- zp1 + geom_linerange(aes(x = Variable, ymin = Coefficient - SE*interval1,
                                  ymax = Coefficient + SE*interval1),
                              lwd = 1, position = position_dodge(width = 1/2))
  zp1 <- zp1 + geom_pointrange(aes(x = Variable, y = Coefficient, 
                                   ymin = Coefficient - SE*interval2,
                                   ymax = Coefficient + SE*interval2),
                               lwd = 1/2, position = position_dodge(width = 1/2)
                               #fill = "WHITE"
  )
  zp1 <- zp1 + ylab("Likelihood of Ethnic Comment") + xlab(xlab)
  zp1 <- zp1 + scale_color_grey(start=.25)  + theme_bw() 
  zp1 <- zp1# + theme(legend.title = element_text("Ethnic Term"))
  zp1 <- zp1 + coord_flip() + scale_y_continuous(limits = Axis_Range) 
  ggsave(png_file,zp1,height=12,width=18,units="cm")
}
coef_plot(
  Variable_Names=c("Hausa-Fulani","Yoruba","Igbo","Ijaw"),
  Variable_Range=c(2:5),
  Axis_Range=c(-.1,.7),
  xlab="Ethnic Term in Headline",
  png_file="./models_article_plot_ethnic.png")

#### ++ Table 5: Poisson Model of the Count of Ethnic Comments####

### Regressions
m_eth <- glm(paste("comment_ethnic_freq ~", iv_top), data=art,family="poisson"); commarobust(m_eth)
m_hf <- glm(paste("comment_ethnic_hf_freq ~", iv_top), data=art,family="poisson"); commarobust(m_hf)
m_yo <- glm(paste("comment_ethnic_yoruba_freq ~", iv_top), data=art,family="poisson"); commarobust(m_yo)
m_ib <- glm(paste("comment_ethnic_igbo_freq ~", iv_top), data=art,family="poisson"); commarobust(m_ib)
m_ij <- glm(paste("comment_ethnic_ijaw_freq ~", iv_top), data=art,family="poisson"); commarobust(m_ij)
m_nig <- glm(paste("comment_nigeria_freq ~", iv_top), data=art,family="poisson"); commarobust(m_nig)

### Table
stargazer(m_eth,m_hf,m_yo,m_ib,m_ij,m_nig, # type="text",
          se = makerobustseslist(m_eth,m_hf,m_yo,m_ib,m_ij,m_nig),
          p = makerobustpslist(m_eth,m_hf,m_yo,m_ib,m_ij,m_nig),
          label = "tab:models_article_poisson", font.size = "tiny",
          dep.var.labels=c("All Ethnic","Hausa-Fulani","Yoruba","Igbo","Ijaw","Nigeria"),
          covariate.labels=covar.names, 
          out="./models_article_poisson.tex",
          title = "Relationship between Headline Keywords and the Average Change in the Number of Ethnic or National Comments in Response to Nigerian News Articles (Poisson)",
          omit=c("top","Constant","ethnic_conflict","relig","pub_service"),
          notes.append = TRUE, notes.align = "r",
          notes = "Models also include control variables for content topic developed with STM models."
)

#### + Nigeria Supplementary Information Appendix ####

#### ++ SI Table 1: Keyword Table ####

# modifying hausa-fulani b/c analyzed as one term
word_bank[11,1] <- c("hausa|fulani") # hausa-fulani
word_bank <- word_bank[-12,] # remove fulani
data.frame(word_bank)

var_names <- c("Unemployment","Oil", "Naira", 
               "Buhari","Saraki","PDP","APC","Federal",
               "Corrupt","Crime","Hausa-Fulani","Yoruba","Igbo","Ijaw",
               "Football",
               "Biafra Conflict","Herdsman Conflict",
               "Boko Haram Conflict","Delta Conflict","Christian","Muslim","Public Services",
               "Nigeria")
categories <- c(rep("Economic",3),
                rep("Political",5),
                rep("Scapegoating",2),
                rep("Ethnic",4),
                "Placebo",
                rep("Control",7),
                "National"
)
data.frame(categories)
data.frame(var_names)
data.frame(word_bank)
tab <- data.frame(
  Category=categories,
  Variable=var_names,
  Keywords=tolower(gsub(" ", "",word_bank))
)
# special modifications
tab$Keywords <- gsub("\\|",", ",tab$Keywords)
tab$Keywords <- gsub("championsleague","champions league",tab$Keywords)
tab$Keywords <- gsub("worldcup","world cup",tab$Keywords)
tab2 <- tab[c(11:14, 23, 1:10, 14:22,15),]
word_bank_tab <- xtable(tab2,
                        label = "tab:keyword_searches",
                        caption="Keyword Search Parameters",
                        size="scriptsize", include.rownames=FALSE)
print(word_bank_tab,include.rownames = F,size="\\scriptsize",
      file="./keyword_searches.tex")

#### ++ SI Table 2: Most Prevalent STM Word ####

topics <- labelTopics(art.fit10h, n=30)
top <- t(topics$prob) # pulling out probability 
tab <- data.frame(rbind(c(1:10),top[,c(1:10)])[c(2:11),])
names(tab) <- c("Topic 1","Topic 2","Topic 3","Topic 4","Topic 5",
                "Topic 6","Topic 7","Topic 8","Topic 9","Topic 10")
tabx <- xtable(tab,caption="Article Content Topics",label = "tab:topics")
print(tabx,size="scriptsize",include.rownames=FALSE,
      file="./topics1.tex")

#### ++ SI Figure 1: Comments per Article/Ethnic Comments per Article ####

comments_per_article_ethnic <- data.frame(count(d[which(d$comment_ethnic==1),]$THREAD_URL)) # using thread_ids associated with article
comments_per_article <- data.frame(count(d$THREAD_URL)) # using thread_ids associated with article
tab <- merge(comments_per_article, comments_per_article_ethnic, all.x=T, by="x")
colnames(tab) <- c("art","comments","ethnic_comments")
tab_all <- data.frame(table(tab$comments))
tab_eth <- data.frame(table(tab$ethnic_comments))
tab <- merge(tab_all,tab_eth,all=T,by="Var1")
tab[is.na(tab)] <- 0
colnames(tab) <- c("Frequency","All","Ethnic")

### plot
tab2 <- melt(tab,variable.name="Type",value.name = "Number")
colnames(tab2) <- c("Frequency","Type","Number")
png(file='./comments_per_article.png',600,650)
ggplot(data = tab2, aes(x = Frequency, y = Number)) + 
  geom_col() +
  facet_wrap( ~ Type, nrow=2) +
  ylab("Number of Articles with Comment Frequency") +
  xlab("Number of Comments") +
  theme_bw()
dev.off()

#### ++ SI Figure 2: Comments per Commenters/Ethnic Comments per Commenter ####

comments_per_commenter_ethnic <- data.frame(count(d[which(d$comment_ethnic==1),]$author_id)) # using thread_ids associated with article
comments_per_commenter <- data.frame(count(d$author_id)) # using thread_ids associated with article
tab <- merge(comments_per_commenter, comments_per_commenter_ethnic, all.x=T, by="x")
colnames(tab) <- c("commenter","comments","ethnic_comments")
tab_all <- data.frame(table(tab$comments))
tab_eth <- data.frame(table(tab$ethnic_comments))
tab <- merge(tab_all,tab_eth,all=T,by="Var1")
tab[is.na(tab)] <- 0
colnames(tab) <- c("Frequency","All","Ethnic")
tab <- tab[c(1:50),]

### plot
tab2 <- melt(tab,variable.name="Type",value.name = "Number")
colnames(tab2) <- c("Frequency","Type","Number")
png(file='./comments_per_commenter.png',600,650)
ggplot(data = tab2, aes(x = Frequency, y = Number)) + 
  geom_col() +
  facet_wrap( ~ Type, nrow=2) +
  ylab("Number of Commenters with Comment Frequency") +
  xlab("Number of Comments (Up to 50)") +
  theme_bw()
dev.off()

#### ++ SI Table 3: Comparing Commenters and Non-commenters ####

# Changing Name/Adding dummy
com <- d
com$comment <- 1
# Agregregating
com.ag1 <- aggregate(comment_ethnic ~ author_id, com, sum)
com.ag2 <- aggregate(comment_ethnic_hf ~ author_id, com, sum)
com.ag3 <- aggregate(comment_ethnic_yoruba ~ author_id, com, sum)
com.ag4 <- aggregate(comment_ethnic_igbo ~ author_id, com, sum)
com.ag5 <- aggregate(comment_ethnic_ijaw ~ author_id, com, sum)
com.ag6 <- aggregate(comment_likes ~ author_id, com, sum)
com.ag7 <- aggregate(comment_dislikes ~ author_id, com, sum)
com.ag8 <- aggregate(comment_is_flagged ~ author_id, com, sum)
com.ag9 <- aggregate(comment ~ author_id, com, sum)

auth <- merge(auth,com.ag1,all=T,by="author_id")
auth <- merge(auth,com.ag2,all=T,by="author_id")
auth <- merge(auth,com.ag3,all=T,by="author_id")
auth <- merge(auth,com.ag4,all=T,by="author_id")
auth <- merge(auth,com.ag5,all=T,by="author_id")
auth <- merge(auth,com.ag6,all=T,by="author_id")
auth <- merge(auth,com.ag7,all=T,by="author_id")
auth <- merge(auth,com.ag8,all=T,by="author_id")
auth <- merge(auth,com.ag9,all=T,by="author_id")

# authors with at least one ethnic comment
auth$comment_ethnic_atleastone <- ifelse(auth$comment_ethnic>=1,1,0)
# ethnic commenting authors
auth.e <- auth[which(auth$comment_ethnic_atleastone==1),]
auth.ne <- auth[which(auth$comment_ethnic_atleastone==0),] 

auth.tab <- data.frame(rbind(c("Total Commenters",round(nrow(auth.e),2),round(nrow(auth.ne),2)),
                             c("Median number of total comments",round(median(auth.e$comment),2),round(median(auth.ne$comment),2)),
                             c("Mean Disqus reputation score",round(mean(auth.e$author_reputation,na.rm=T),2),round(mean(auth.ne$author_reputation,na.rm=T),2)),
                             c("Percent commenters private profiles",round(mean(auth.e$author_is_private,na.rm=T)*100,2),round(mean(auth.ne$author_is_private,na.rm=T)*100,2)),
                             c("Mean comment likes",round(mean(auth.e$comment_likes,na.rm=T),2),round(mean(auth.ne$comment_likes,na.rm=T),2)),
                             c("Mean comment dislikes",round(mean(auth.e$comment_dislikes,na.rm=T),2),round(mean(auth.ne$comment_dislikes,na.rm=T),2)),
                             c("Mean comments flagged",round(mean(auth.e$comment_is_flagged,na.rm=T),2),round(mean(auth.ne$comment_is_flagged,na.rm=T),2))
))
colnames(auth.tab) <- c("Commenter with Ethnic Comment?","Yes","No")

print(xtable(auth.tab,caption="Commenters with an Ethnic Comments versus All Other Commenters",
      label="tab:com_comp"),
      file="./commenter_comparison.tex",
      include.rownames=FALSE)

#### ++ SI Table 4: News Consumption by Other Demographic Variables ####

dems <- data.frame(
  Gender=as.character(n2$Q101),
  Education=n2$education,
  Employment=as.character(n2$employment),
  Pop_Density=n2$URBRUR,
  Discuss_Pol=n2$politics,
  Internet_News=n2$news_internet
)

tableNominal(dems[,-ncol(dems)], 
             group = dems$Internet_News,
             #print.pval = "chi2",
             font.size="tiny",
             cumsum=FALSE,
             lab="tab:demographics",
             cap="Online News Consumption Demographics among Afrobarameter Respondents",
             file="./demographics.tex")

#### ++ SI Table 5: Random Selection of Comments ####

# note that the output will vary based on your computer's seed
d_eth <- d[which(d$comment_ethnic==1),"comment_text"]
tabx <- xtable(data.frame(Comment=sample(d_eth,10)),
               align=c("c", "p{15cm}"),
               label = "tab:random_comments",
               caption = "Randomly-selected Ethnic Comments")
print(tabx,
      booktabs = T,
      file="./random_comments.tex",
      size="\\scriptsize")

#### ++ SI Table 6: Table of Nigeria Summary Statistics ####

stats <- c(
  # Percent headlines with ethnic term 
  mean(art$title_ethnic_hf),
  mean(art$title_ethnic_yoruba),
  mean(art$title_ethnic_igbo),
  mean(art$title_ethnic_ijaw),
  mean(art$title_nigeria),
  # Percent articles with at least one ethnic comment
  mean(art$comment_ethnic_hf),
  mean(art$comment_ethnic_yoruba),
  mean(art$comment_ethnic_igbo),
  mean(art$comment_ethnic_ijaw),
  mean(art$comment_nigeria),
  # percent ALL comments that are ethnic comments 
  mean(d$comment_ethnic_hf), 
  mean(d$comment_ethnic_yoruba), 
  mean(d$comment_ethnic_igbo), 
  mean(d$comment_ethnic_ijaw), 
  mean(d$comment_nigeria)
)

# Convert to Table
tab <- data.frame(
  Statistic=c("% Headlines with Ethnic Term (Hausa-Fulani)",
              "% Headlines with Ethnic Term (Yoruba)",
              "% Headlines with Ethnic Term (Igbo)",
              "% Headlines with Ethnic Term (Ijaw)",
              "% Headlines with National Term (Nigeria)",
              "% Articles with Ethnic Comment (Hausa-Fulani)",
              "% Articles with Ethnic Comment (Yoruba)",
              "% Articles with Ethnic Comment (Igbo)",
              "% Articles with Ethnic Comment (Ijaw)",
              "% Articles with National Comment (Nigeria)",
              "% Comments with Ethnic Term (Hausa-Fulani)",
              "% Comments with Ethnic Term (Yoruba)",
              "% Comments with Ethnic Term (Igbo)",
              "% Comments with Ethnic Term (Ijaw)",
              "% Comments with National Term (Nigeria)"),
  Amount=stats)
tab$`Amount (%)` <- round(tab$Amount*100,2)

# print table
print(xtable(tab[,c(1,3)],
             caption="Summary Statistics for Nigeria"),
      label="tab:summary_rsa",
      include.rownames=FALSE,
      file="./sum_articles_nigeria.tex")

#### ++ SI Table 7: Logit Models ####

### Regressions
m_eth <- glm(paste("comment_ethnic ~", iv_top), data=art,family=binomial("logit")); commarobust(m_eth)
m_hf <- glm(paste("comment_ethnic_hf ~", iv_top), data=art,family=binomial("logit")); commarobust(m_hf)
m_yo <- glm(paste("comment_ethnic_yoruba ~", iv_top), data=art,family=binomial("logit")); commarobust(m_yo)
m_ib <- glm(paste("comment_ethnic_igbo ~", iv_top), data=art,family=binomial("logit")); commarobust(m_ib)
m_ij <- glm(paste("comment_ethnic_ijaw ~", iv_top), data=art,family=binomial("logit")); commarobust(m_ij)
m_nig <- glm(paste("comment_nigeria ~", iv_top), data=art,family=binomial("logit")); commarobust(m_nig)

### Table
stargazer(m_eth,m_hf,m_yo,m_ib,m_ij,m_nig, # type="text",
          se = makerobustseslist(m_eth,m_hf,m_yo,m_ib,m_ij,m_nig),
          p = makerobustpslist(m_eth,m_hf,m_yo,m_ib,m_ij,m_nig),
          label = "tab:models_article_logit",
          font.size = "tiny",
          dep.var.labels=c("All Ethnic","Hausa-Fulani","Yoruba","Igbo","Ijaw","Nigeria"),
          covariate.labels=covar.names,
          out="./models_article_logit.tex",
          title = "Relationship between Headline Keywords and the Likelihood of At Least One Ethnic or National Comment in Response to Nigerian News Articles (Logit)",
          omit=c("top","Constant","ethnic_conflict","relig","pub_service"),
          notes.append = T, 
          notes.align = "r",
          notes = "Models also include control variables for content topic developed with STM models."
)

#### ++ SI Table 8: OLS Religion ####

### Independent Variables
covar.names <- c(
  "Christian (Religion)","Muslim (Religion)",
  "Hausa-Fulani (Ethnic)","Yoruba (Ethnic)",
  "Igbo (Ethnic)","Ijaw (Ethnic)",
  "Nigeria (National)",
  "Unemployment (Economic)","Oil (Economic)",
  "Naira (Economic)","Buhari (Political)",
  "Saraki (Political)","PDP (Political)", 
  "APC (Political)","Federal (Political)",
  "Corrupt (Scapegoating)",
  "Crime (Scapegoating)", "Football (Placebo)"     
)
iv <- "title_relig_christian+title_relig_muslim+title_ethnic_hf+title_ethnic_yoruba+title_ethnic_igbo+title_ethnic_ijaw+title_nigeria+title_unemploy+title_oil+title_naira+title_buhari+title_saraki+title_pdp+title_apc+title_federal+title_corrupt+title_crime+title_placebo_football+title_ethnic_conflict_igbo+title_ethnic_conflict_fulani+title_ethnic_conflict_bk+title_ethnic_conflict_delta+title_pub_services"
tops <-  paste(c("top1_bin10", "top2_bin10","top3_bin10",
                 "top4_bin10","top5_bin10",
                 "top6_bin10","top7_bin10", "top8_bin10",
                 "top9_bin10", "top10_bin10"),collapse="+")
tops <- paste("+", tops)
iv_top <- paste(iv, tops)

### standard article level 
m_ch <- lm(paste("comment_relig_christ ~ ",iv_top), art); commarobust(m_ch)
m_mu <- lm(paste("comment_relig_muslim ~ ",iv_top), art); commarobust(m_mu)

### output table
stargazer(m_ch,m_mu,
          se = makerobustseslist(m_ch,m_mu),
          p = makerobustpslist(m_ch,m_mu),
          label = "tab:models_article_relig",
          font.size = "tiny",
          dep.var.labels=c("Christian","Muslim"),
          covariate.labels=covar.names,
          out="./models_article_relig.tex",
          title = "Relationship between Religious Headline Keywords and the Likelihood of At Least One Religious Comment in Response to Nigerian News Articles",
          omit=c("top","Constant","ethnic_conflict","pub_service"),
          notes.append = T, 
          notes.align = "r")

#### ++ SI Table 9: Nigeria Commenter Fixed Effects ####

### Create dataset of commenters who have done at least one ethnic and one non-ethnic comment
d$comment_nonethnic <- ifelse(d$comment_ethnic==0,1,0)
comFE <- ddply(d[,c("author_id","comment_ethnic","comment_nonethnic")], "author_id", summarise,
               comment_ethnic_total=sum(comment_ethnic),
               comment_nonethnic_total=sum(comment_nonethnic))
comFE_filt <- comFE[which(comFE$comment_ethnic_total>=1,comFE$comment_nonethnic_total>=1),]
d_filt <- d[d$author_id %in% comFE_filt$author_id,]

### merge with article data
art_nocom <- art[,c(1:55,76)]
d_fe <- merge(d_filt, art_nocom, 
              all.x=T, 
              by="THREAD_URL")

# adding fixed effects
iv_fe <- paste(iv_top,"+factor(author_id)")

### Regressions
m_eth <- lm(paste("comment_ethnic ~ ",iv_fe), d_fe)#; commarobust(m_eth)
m_hf <- lm(paste("comment_ethnic_hf ~ ",iv_fe), d_fe)#; commarobust(m_hf)
m_yo <- lm(paste("comment_ethnic_yoruba ~ ",iv_fe), d_fe)#; commarobust(m_yo)
m_ib <- lm(paste("comment_ethnic_igbo ~ ",iv_fe), d_fe)#; commarobust(m_ib)
m_ij <- lm(paste("comment_ethnic_ijaw ~ ",iv_fe), d_fe)#; commarobust(m_ij)
m_nig <- lm(paste("comment_nigeria ~ ",iv_fe), d_fe)#; commarobust(m_nig)

### Table
library(texreg)
texreg(list(m_eth,m_hf,m_yo,m_ib,m_ij,m_nig),
       omit.coef = "author_id|top|Intercept|ethnic_conflict|relig|pub_service",
       label = "tab:models_article_fe",
       fontsize = "tiny",
       custom.model.names=c("All Ethnic","Hausa-Fulani","Yoruba","Igbo","Ijaw","Nigeria"),
       custom.coef.names=covar.names,
       file="./models_article_fe2.tex",
       caption = "Relationship Between Headline Keywords and the Likelihood of Ethnic or National Terms Appearing in Comments in Response to Nigerian News Articles (Commenter Fixed Effects)"
)

#### + Load South Africa Data ####

# South Africa data pulled from broader dataset of African countries 

load("./south_africa_data.RData")

#### + South Africa Analysis ####

#### ++ Table 6: OLS Regression of Headlines ####

iv <- "+headline_econ+headline_politics+headline_scape+elect.prox30"
iveth <- paste("headline_ethnic2_white+headline_ethnic2_black+headline_ethnic2_indian+headline_ethnic2_zulu+headline_ethnic2_other+headline_sa",iv)
cov_labels <- c("White","Black","Indian","Zulu","Other Ethnic",
                "South Africa",
                "Economic","Political","Scapegoating",
                "Election Proximity")

### Regressions
m1c <- lm(paste("comment_ethnic2_white ~ ", iveth), art.sa); summary(m1c)
m2c <- lm(paste("comment_ethnic2_black ~ ", iveth), art.sa); summary(m2c)
m3c <- lm(paste("comment_ethnic2_indian ~ ", iveth), art.sa); summary(m3c)
m4c <- lm(paste("comment_ethnic2_zulu ~ ", iveth), art.sa); summary(m4c)
m5c <- lm(paste("comment_ethnic2_other ~ ", iveth), art.sa); summary(m5c)
m6c <- lm(paste("comment_sa ~ ", iveth), art.sa); summary(m6c)

### Table
stargazer(m1c,m2c,m3c,m4c,m5c,m6c, type = "text",
          se = makerobustseslist(m1c,m2c,m3c,m4c,m5c,m6c),
          p = makerobustpslist(m1c,m2c,m3c,m4c,m5c,m6c),
          label = "tab:models_article_rsa",
          dep.var.caption = "At Least One Ethnic/Racial Comment",
          dep.var.labels = c("White","Black","Indian","Zulu","Other Ethnic","South Africa"),
          covariate.labels = cov_labels,
          font.size = "tiny",
          keep.stat = c("n","rsq"),
          out="./art_head_rsa.tex",
          omit="Constant",
          title = "Relationship between Headline Keywords and the Likelihood of At Least One Ethnic/Racial or National Comment in Response to South African News Articles"#,

)

#### ++ Table 7: Poisson Model Regression ####

### Regressions
m1c <- glm(paste("comment_ethnic2_white_freq ~",iveth), 
           data=art.sa,family="poisson");summary(m1c)
m2c <- glm(paste("comment_ethnic2_black_freq ~",iveth), 
           data=art.sa,family="poisson");summary(m2c)
m3c <- glm(paste("comment_ethnic2_indian_freq ~",iveth), 
           data=art.sa,family="poisson");summary(m3c)
m4c <- glm(paste("comment_ethnic2_zulu_freq ~",iveth), 
           data=art.sa,family="poisson");summary(m4c)
m5c <- glm(paste("comment_ethnic2_other_freq ~",iveth), 
           data=art.sa,family="poisson");summary(m5c)
m6c <- glm(paste("comment_sa_freq ~",iveth), 
           data=art.sa,family="poisson");summary(m6c)

### Table
stargazer(m1c,m2c,m3c,m4c,m5c,m6c, type="text",
          se = makerobustseslist(m1c,m2c,m3c,m4c,m5c,m6c),
          p = makerobustpslist(m1c,m2c,m3c,m4c,m5c,m6c),
          label = "tab:models_article_poisson_rsa",
          dep.var.caption = "Number of Ethnic/Racial Comments",
          dep.var.labels = c("White","Black","Indian","Zulu","Other Ethnic","South Africa"),
          covariate.labels = cov_labels,
          font.size = "tiny",
          keep.stat = c("n","rsq"),
          out="./art_head_poisson_rsa_corrected.tex",
          omit="Constant",
          title = "Relationship between headline keywords and the average change in the number of ethnic/racial or national comments in response to South African news articles (Poisson)"
)

#### + South Africa Supplementary Information Appendix ####

#### ++ SI Table 10: South Africa Keywords ####

### Ethnicity Breakdown
### White
ethnic.terms.sa_white <- paste(c("English","Afrikaans",
                                 "Afrikaner","Boer","White","European"), collapse = "|")
### Black
ethnic.terms.sa_black <- paste(c("Black"), collapse = "|")
### Indian
ethnic.terms.sa_indian <- paste(c("Indian"), collapse = "|")
### Zulu 
ethnic.terms.sa_zulu <- paste(c(" Zulu"), collapse = "|")
### Other
ethnic.terms.sa_other <- paste(c("Venda","Ndebele",
                                 "Xhosa","Pedi",
                                 "North Sotho","Sotho",
                                 "South Sotho","Tswana","Coloured",
                                 "Shangaan","Swazi"), collapse = "|")
# election terms
election.terms.generic <- c("elect","vote","voting","poll")
election.terms.sa <- c("African Christian Democratic Party", "ACDP","African Muslim Party",
                       "AMP","African National Congress", "ANC","Azanian People’s Organization", "AZAPO",
                       "Congress of the People", "COPE","Democratic Alliance", "DA","Freedom Front Plus",
                       "Vryheidsfront Plus", "VF Plus","Independent Democrats", "ID","Inkatha Freedom Party", "IFP",
                       "Minority Front", "MF","National Democratic Convention", "NADECO","New National Party",
                       "Nuwe Nasionale Party", "NNP","Pan Africanist Congress", "PAC",
                       "United Christian Democratic Party", "UCDP","United Democratic Movement", "UDM",
                       "United Independent Front", "UIF","Economic Freedom Fighters", "EFF","African Independent Congress",
                       "Agang SA","Al Jama-ah","Bushbuckridge Residents Association","First Nation Liberation Alliance",
                       "Front Nasionaal","Independent Civic Organisation of South Africa", "ICOSA",
                       "Keep It Straight and Simple", "KISS","Kingdom Governance Movement","National Freedom Party", "NFP",
                       "Pan Africanist Movement", "PAM","Patriotic Alliance","Peoples Alliance",
                       "Ubuntu Party","United Congress","Workers and Socialist Party", "WASP")
# add whitespace to parties
for(i in 1:length(election.terms.sa)){
  term <- election.terms.sa[i]
  term.ws <- str_pad(term, width = nchar(term)+2, side = "both")
  election.terms.sa[i] <- term.ws
}
# Add generic terms
election.terms.sa <- c(election.terms.sa,election.terms.generic)
# Create vector for IDing words
election.terms.sa_grep <- paste(election.terms.sa, collapse = "|")
### Scapegoat Terms
scape.terms.generic <- c("corrupt", "bribe", "embezzle", "crime", "police", 
                         "arrest", "murder","kill","justice","kidnap",
                         "robber","shooting")
scape.terms.sa <- scape.terms.generic
scape.terms.sa_grep <- paste(scape.terms.sa, collapse = "|")
### Class Terms
class.terms.generic <- c("worker", "poor", "rich", "wealthy", "poverty", 
                         "middle class", "upper class","lower class",
                         "white collar","blue collar","labor")
class.terms.sa <- class.terms.generic
class.terms.sa_grep <- paste(class.terms.sa, collapse = "|")
### Nation Terms
nation.terms.generic <- c("south africa","rsa")
nation.terms.sa <- nation.terms.generic
nation.terms.sa_grep <- paste(nation.terms.sa, collapse = "|")

### Terms
keywords_rsa <- gsub(" , ",", ",gsub("\\|",", ",c(tolower(c(ethnic.terms.sa_white,ethnic.terms.sa_black,
                                                            ethnic.terms.sa_indian,ethnic.terms.sa_zulu,
                                                            ethnic.terms.sa_other)),
                                                  nation.terms.sa_grep,
                                                  class.terms.sa_grep,
                                                  election.terms.sa_grep,
                                                  scape.terms.sa_grep)))

tab <- data.frame(
  Category=c("White (Race)","Black (Race)","Indian (Ethnicity)","Zulu (Ethnicity)","Other (Race/Ethnicity)",
             "South Africa (National)","Economic","Political","Scapegoating"),
  Keywords=keywords_rsa)

tabx <- xtable(tab,
               align=c("l","r", "p{10cm}"),
               label = "tab:keyword_search_rsa",
               caption = "Keyword Search Parameters for South Africa")

print(tabx, include.rownames=F, size="\\scriptsize",
      file="./keyword_searches_rsa.tex")

#### ++ SI Table 11: South Africa Summary Statistics ####

stats <- c(
  # Percent headlines with ethnic term 
  mean(art.sa$headline_ethnic2_white),
  mean(art.sa$headline_ethnic2_black),
  mean(art.sa$headline_ethnic2_indian),
  mean(art.sa$headline_ethnic2_zulu),
  mean(art.sa$headline_ethnic2_other),
  mean(art.sa$headline_sa),
  # Percent articles with at least one ethnic comment
  mean(art.sa$comment_ethnic2_white),
  mean(art.sa$comment_ethnic2_black),
  mean(art.sa$comment_ethnic2_indian),
  mean(art.sa$comment_ethnic2_zulu),
  mean(art.sa$comment_ethnic2_other),
  mean(art.sa$comment_sa),
  ### percent of comments with national term
  mean(com.sa$comment_ethnic2_white),
  mean(com.sa$comment_ethnic2_black),
  mean(com.sa$comment_ethnic2_indian),
  mean(com.sa$comment_ethnic2_zulu),
  mean(com.sa$comment_ethnic2_other),
  mean(com.sa$comment_sa))

# Convert to Table
tab <- data.frame(
  Statistic=c("% Headlines with Ethnic Term (White)",
              "% Headlines with Ethnic Term (Black)",
              "% Headlines with Ethnic Term (Indian)",
              "% Headlines with Ethnic Term (Zulu)",
              "% Headlines with Ethnic Term (Other)",
              "% Headlines with National Term (South Africa)",
              "% Articles with Ethnic Comment (White)",
              "% Articles with Ethnic Comment (Black)",
              "% Articles with Ethnic Comment (Indian)",
              "% Articles with Ethnic Comment (Zulu)",
              "% Articles with Ethnic Comment (Other)",
              "% Articles with Ethnic Comment (South Africa)",
              "% Comments with Ethnic Term (White)",
              "% Comments with Ethnic Term (Black)",
              "% Comments with Ethnic Term (Indian)",
              "% Comments with Ethnic Term (Zulu)",
              "% Comments with Ethnic Term (Other)",
              "% Comments with National Term (South Africa)"),
  Amount=stats)
tab$`Amount (%)` <- round(tab$Amount*100,2)

print(xtable(tab[,c(1,3)],caption="Summary Statistics for South Africa"),
      label="tab:summary_rsa",
      include.rownames=FALSE,
      file="./sum_articles_rsa.tex")

#### ++ SI Table 12: OLS Regression without Controls####

iv_nc <- "headline_ethnic2_white+headline_ethnic2_black+headline_ethnic2_indian+headline_ethnic2_zulu+headline_ethnic2_other"

### Regressions
m1c <- lm(paste("comment_ethnic2_white ~ ", iv_nc), art.sa); summary(m1c)
m2c <- lm(paste("comment_ethnic2_black ~ ", iv_nc), art.sa); summary(m2c)
m3c <- lm(paste("comment_ethnic2_indian ~ ", iv_nc), art.sa); summary(m3c)
m4c <- lm(paste("comment_ethnic2_zulu ~ ", iv_nc), art.sa); summary(m4c)
m5c <- lm(paste("comment_ethnic2_other ~ ", iv_nc), art.sa); summary(m5c)
m6c <- lm(paste("comment_sa ~ ", iv_nc), art.sa); summary(m6c)

### Table
stargazer(m1c,m2c,m3c,m4c,m5c,m6c, # type="text",
          se = makerobustseslist(m1c,m2c,m3c,m4c,m5c,m6c),
          p = makerobustpslist(m1c,m2c,m3c,m4c,m5c,m6c),
          label = "tab:models_article_rsa_nocontrols",
          dep.var.caption = "At Least One Ethnic/Racial Comment",
          dep.var.labels = c("White","Black","Indian","Zulu","Other Ethnic","South Africa"),
          covariate.labels = cov_labels[1:5],
          font.size = "tiny",
          keep.stat = c("n","rsq"),
          out="./art_head_rsa_nocontrols.tex",
          omit="Constant",
          title = "Article Headline Effect on Comment Content  (No Controls)"
          
)

#### ++ SI Table 13: South Africa Logit Models ####

### Regressions
m1c <- glm(paste("comment_ethnic2_white ~ ", iveth), art.sa,family=binomial("logit")); summary(m1c)
m2c <- glm(paste("comment_ethnic2_black ~ ", iveth), art.sa,family=binomial("logit")); summary(m2c)
m3c <- glm(paste("comment_ethnic2_indian ~ ", iveth), art.sa,family=binomial("logit")); summary(m3c)
m4c <- glm(paste("comment_ethnic2_zulu ~ ", iveth), art.sa,family=binomial("logit")); summary(m4c)
m5c <- glm(paste("comment_ethnic2_other ~ ", iveth), art.sa,family=binomial("logit")); summary(m5c)
m6c <- glm(paste("comment_sa ~ ", iveth), art.sa, family=binomial("logit")); summary(m6c)

### Table
stargazer(m1c,m2c,m3c,m4c,m5c,m6c,  # type="text",
          se = makerobustseslist(m1c,m2c,m3c,m4c,m5c,m6c),
          p = makerobustpslist(m1c,m2c,m3c,m4c,m5c,m6c),
          label = "tab:models_article_rsa_logit",
          dep.var.caption = "At Least One Ethnic/Racial Comment",
          dep.var.labels = c("White","Black","Indian","Zulu","Other Ethnic","South Africa"),
          covariate.labels = cov_labels,
          font.size = "tiny",
          keep.stat = c("n","rsq"),
          out="./art_head_rsa_logit.tex",
          omit="Constant",
          title = "Article Headline Effect on Comment Content (Logit)")
